function [last_episode]=simulation_exp(alpha1,beta1,alpha2,beta2,T,vh,vl,mu_l,sigma_l,grid_middle,tick,grid_size)

%Runs a learning experiment once, for T episodes. 
%Allows for the two market-makers to have different alphas and betas. 
%Works only for 2 AMMs.

%This function returns:

%last_episode, a 1x6 vector with the values in the last episode T
%of A_1, A_2, v_tilde, l, total profit of AMM1, total profit of AMM2.

%Pre-allocate
all_episodes = zeros(T,4);

%Inizialize Q-matrix with random values higher than monopoly expected
%profit.
Q_n=3+(6-3)*rand((2*grid_size+1),2);

%Generate a vector of T observations with prob=0.5 to be vl and prob=0.5 to be vh.
v_tilde = randsample([vl, vh], T, true)';             %compute the value of the asset in each episode
l  = normrnd(mu_l,sigma_l,T,1);                       %vector of random draws of l in each episode
vc = v_tilde +l;                                      %vector of investor valuations in each episode

%Generate vector to determine experimentation episodes, one for each AMM
epsilon1 = exp(-beta1*(1:1:T));               %Vector of experimentation probabilities
epsilon2 = exp(-beta2*(1:1:T));               %Vector of experimentation probabilities

%Generate T x 2 matrix with 1 in (t,n) if AMM n experiments at time t.
Experiment = zeros(T,2);
Experiment(:,1) = binornd(1,epsilon1);  
Experiment(:,2) = binornd(1,epsilon2);

%Loop over all episodes.
for t = 1:T
    %We compute the greedy price in episode t, for each AMM.
    %We preallocate the index of price (from 1 to 2*grid_size+1) chosen by each AMM:
    s = zeros(1,2);
    for i=1:2
        all_episodes(t,2+i) = max(Q_n(:,i)); %Record the maximum value of the Q matrix for each AMM.
        maxvector = find(Q_n(:,i) == all_episodes(t,2+i));  %Indices of all the values corresponding to a maximum. 
        %Note: finding all the values with maxvector is irrelevant if the Q_matrix has been initialized with
        %continuous random variables, but is important otherwise.        
        s(1,i)  = maxvector(randi([1 length(maxvector)],1,1)); %Randomize if there are multiple greedy prices

    %Compute the actual price chosen by AMM i based on experimentation or exploitation 
    %If Experiment=1 replace the greedy price with a random price: 

        if Experiment(t,i) == 1
            s(1,i)  = randi([1 (2*grid_size+1)],1,1);  %Generates a random index in the range 1 to 2*grid_size+1
        end
    end 

    %We compute the profit in episode t. Investor buys if vc is greater than 
    %the best price a_min, such that profit is (a_min - v_tilde) if [vc >= a_min], and zero otherwise.
    %If AMMs set the same price the profit is split between the AMMs.

    smin = min(s(1,1:2)); %find lowest price index.
    pmin=(grid_middle - grid_size*tick - tick) + tick*smin;    %compute the lowest price.

    if  pmin <= vc(t)                          %check that the customer buys      
        index = find(smin == s(1,1:2));                 %find all AMMs that set a_min
        m = size(index,2);                              %compute the number of players that set a_min
        for i=index
            all_episodes(t,i) = (pmin - v_tilde(t) ) / m;    %compute these players' profits. 
            % Note that for all other players or if the customer doesn't buy the profit stays the pre-allocated value of zero.
        end
    end 

    %Update the Q-Matrix for each player 

        %Update the q-value associated with the price actually played
        Q_n(s(1,1),1) = alpha1*all_episodes(t,1) + (1-alpha1)*Q_n(s(1,1),1);
        Q_n(s(1,2),2) = alpha2*all_episodes(t,2) + (1-alpha2)*Q_n(s(1,2),2);

end 

%Record the last episode: A1, A2, v, l, C_T, total profit MM1, total profit MM2.
last_episode = [(grid_middle - grid_size*tick - tick) + tick*s(1,1), (grid_middle - grid_size*tick - tick) + tick*s(1,2), v_tilde(T,1), l(T,1), sum(all_episodes(:,1)), sum(all_episodes(:,2))];
